<!DOCTYPE html>



  


<html class="theme-next gemini use-motion" lang="zh-Hans">
<head>
  <meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">









<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />
















  
  
  <link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />







<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />

<link href="/css/main.css?v=5.1.4" rel="stylesheet" type="text/css" />


  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png?v=5.1.4">


  <link rel="mask-icon" href="/images/logo.svg?v=5.1.4" color="#222">





  <meta name="keywords" content="spider_taobao," />










<meta name="description" content="作者:李忠林 Github: https://github.com/Leezhonglin Gitblog: https://leezhonglin.github.io/  ​    使用自动化测试库,来模拟浏览器获取淘宝搜索页面的相关信息.首先我们需要安装selenium | BeautifulSoup4 |  pymongo 库.安装方法. 12345pip install selenium">
<meta name="keywords" content="spider_taobao">
<meta property="og:type" content="article">
<meta property="og:title" content="使用selenium|bs4爬取淘宝商品信息">
<meta property="og:url" content="http://yoursite.com/2018/06/30/使用selenium-bs4爬取淘宝商品信息/index.html">
<meta property="og:site_name" content="Mr. Lee&#39;s blog">
<meta property="og:description" content="作者:李忠林 Github: https://github.com/Leezhonglin Gitblog: https://leezhonglin.github.io/  ​    使用自动化测试库,来模拟浏览器获取淘宝搜索页面的相关信息.首先我们需要安装selenium | BeautifulSoup4 |  pymongo 库.安装方法. 12345pip install selenium">
<meta property="og:locale" content="zh-Hans">
<meta property="og:updated_time" content="2018-06-30T06:19:47.000Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="使用selenium|bs4爬取淘宝商品信息">
<meta name="twitter:description" content="作者:李忠林 Github: https://github.com/Leezhonglin Gitblog: https://leezhonglin.github.io/  ​    使用自动化测试库,来模拟浏览器获取淘宝搜索页面的相关信息.首先我们需要安装selenium | BeautifulSoup4 |  pymongo 库.安装方法. 12345pip install selenium">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/',
    scheme: 'Gemini',
    version: '5.1.4',
    sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":false,"onmobile":false},
    fancybox: true,
    tabs: true,
    motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
    duoshuo: {
      userId: '0',
      author: 'Author'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="http://yoursite.com/2018/06/30/使用selenium-bs4爬取淘宝商品信息/"/>





  <title>使用selenium|bs4爬取淘宝商品信息 | Mr. Lee's blog</title>
  








</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="zh-Hans">

  
  
    
  

  <div class="container sidebar-position-left page-post-detail">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/"  class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">Mr. Lee's blog</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <h1 class="site-subtitle" itemprop="description">知识改变命运,技术成就梦想</h1>
      
  </div>

  <div class="site-nav-toggle">
    <button>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br />
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/archives/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
            
            归档
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/tags/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
            
            标签
          </a>
        </li>
      
        
        <li class="menu-item menu-item-about">
          <a href="/about/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-user"></i> <br />
            
            关于
          </a>
        </li>
      
        
        <li class="menu-item menu-item-commonweal">
          <a href="/404.html" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-heartbeat"></i> <br />
            
            公益404
          </a>
        </li>
      

      
        <li class="menu-item menu-item-search">
          
            <a href="javascript:;" class="popup-trigger">
          
            
              <i class="menu-item-icon fa fa-search fa-fw"></i> <br />
            
            搜索
          </a>
        </li>
      
    </ul>
  

  
    <div class="site-search">
      
  <div class="popup search-popup local-search-popup">
  <div class="local-search-header clearfix">
    <span class="search-icon">
      <i class="fa fa-search"></i>
    </span>
    <span class="popup-btn-close">
      <i class="fa fa-times-circle"></i>
    </span>
    <div class="local-search-input-wrapper">
      <input autocomplete="off"
             placeholder="搜索..." spellcheck="false"
             type="text" id="local-search-input">
    </div>
  </div>
  <div id="local-search-result"></div>
</div>



    </div>
  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://yoursite.com/2018/06/30/使用selenium-bs4爬取淘宝商品信息/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="Mr. Lee">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="/avatar.png">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="Mr. Lee's blog">
    </span>

    
      <header class="post-header">

        
        
          <h2 class="post-title" itemprop="name headline">使用selenium|bs4爬取淘宝商品信息</h2>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2018-06-30T14:18:50+08:00">
                2018-06-30
              </time>
            

            

            
          </span>

          

          
            
          

          
          

          

          
            <div class="post-wordcount">
              
                
                <span class="post-meta-item-icon">
                  <i class="fa fa-file-word-o"></i>
                </span>
                
                  <span class="post-meta-item-text">字数统计&#58;</span>
                
                <span title="字数统计">
                  948
                </span>
              

              
                <span class="post-meta-divider">|</span>
              

              
                <span class="post-meta-item-icon">
                  <i class="fa fa-clock-o"></i>
                </span>
                
                  <span class="post-meta-item-text">阅读时长 &asymp;</span>
                
                <span title="阅读时长">
                  5
                </span>
              
            </div>
          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        <blockquote>
<p>作者:李忠林</p>
<p>Github: <a href="https://github.com/Leezhonglin" target="_blank" rel="noopener">https://github.com/Leezhonglin</a></p>
<p>Gitblog: <a href="https://leezhonglin.github.io/" target="_blank" rel="noopener">https://leezhonglin.github.io/</a></p>
</blockquote>
<p>​    使用自动化测试库,来模拟浏览器获取淘宝搜索页面的相关信息.首先我们需要安装selenium | BeautifulSoup4 |  pymongo 库.安装方法.</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">pip install selenium</span><br><span class="line"></span><br><span class="line">pip install beautifulsoup4</span><br><span class="line"></span><br><span class="line">pip install pymongo</span><br></pre></td></tr></table></figure>
<p>安装好之后我们需要检查一下是否的都安装成功.检查方法在虚拟环境中输入</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">(venv) deMacBook-Pro:spider00 lizhonglin$ pip freeze </span><br><span class="line"></span><br><span class="line">selenium==<span class="number">3.13</span><span class="number">.0</span></span><br><span class="line">beautifulsoup4==<span class="number">4.6</span><span class="number">.0</span></span><br><span class="line">pymongo==<span class="number">3.7</span><span class="number">.0</span></span><br></pre></td></tr></table></figure>
<p>能看见这几个东西就表明我们的库已经成功安装了.</p>
<p>接下来我们就可以开始我们的代码了.首先我们要理清楚解决问题的思路.有了思路了才能开始写我们的代码.  分析问题— — &gt;获取解决问题的方法— — &gt; 解决问题.就有了我们下面的代码.</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br><span class="line">86</span><br><span class="line">87</span><br><span class="line">88</span><br><span class="line">89</span><br><span class="line">90</span><br><span class="line">91</span><br><span class="line">92</span><br><span class="line">93</span><br><span class="line">94</span><br><span class="line">95</span><br><span class="line">96</span><br><span class="line">97</span><br><span class="line">98</span><br><span class="line">99</span><br><span class="line">100</span><br><span class="line">101</span><br><span class="line">102</span><br><span class="line">103</span><br><span class="line">104</span><br><span class="line">105</span><br><span class="line">106</span><br><span class="line">107</span><br><span class="line">108</span><br><span class="line">109</span><br><span class="line">110</span><br><span class="line">111</span><br><span class="line">112</span><br><span class="line">113</span><br><span class="line">114</span><br><span class="line">115</span><br><span class="line">116</span><br><span class="line">117</span><br><span class="line">118</span><br><span class="line">119</span><br><span class="line">120</span><br><span class="line">121</span><br><span class="line">122</span><br><span class="line">123</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> time</span><br><span class="line"><span class="keyword">from</span> selenium <span class="keyword">import</span> webdriver</span><br><span class="line"><span class="keyword">from</span> bs4 <span class="keyword">import</span> BeautifulSoup</span><br><span class="line"><span class="keyword">from</span> pymongo <span class="keyword">import</span> MongoClient</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">save_data_mongodb</span><span class="params">(data)</span>:</span></span><br><span class="line">    <span class="string">"""</span></span><br><span class="line"><span class="string">    保存数据到mongodb</span></span><br><span class="line"><span class="string">    :param data: 传入需要保存的数据</span></span><br><span class="line"><span class="string">    :return: 无</span></span><br><span class="line"><span class="string">    """</span></span><br><span class="line">    <span class="comment"># 链接数据库</span></span><br><span class="line">    conn = MongoClient(<span class="string">'mongodb://127.0.0.1:27017'</span>)</span><br><span class="line">    <span class="comment"># 切换到taobao的数据库</span></span><br><span class="line">    db = conn.taobao</span><br><span class="line">    <span class="comment"># 插入数据</span></span><br><span class="line">    db.goods_info.insert(data)</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">parse_taobao_goods_info_html</span><span class="params">(html)</span>:</span></span><br><span class="line">    <span class="string">"""</span></span><br><span class="line"><span class="string">    解析页面信息</span></span><br><span class="line"><span class="string">    :param html: 需要解析的页面</span></span><br><span class="line"><span class="string">    """</span></span><br><span class="line">    soup = BeautifulSoup(html, <span class="string">'lxml'</span>)</span><br><span class="line">    div_list = soup.find_all(<span class="string">'div'</span>, <span class="string">'J_MouserOnverReq'</span>)</span><br><span class="line">    <span class="keyword">for</span> div <span class="keyword">in</span> div_list:</span><br><span class="line">        <span class="comment"># 获取图片和名称</span></span><br><span class="line">        imgs = div.find(<span class="string">'img'</span>, <span class="string">'J_ItemPic'</span>)</span><br><span class="line">        image_link = <span class="string">'https:'</span> + imgs.attrs.get(<span class="string">'data-src'</span>)</span><br><span class="line">        name = imgs.attrs.get(<span class="string">'alt'</span>)</span><br><span class="line">        <span class="comment"># print('商品图片链接:'+image_link)</span></span><br><span class="line">        <span class="comment"># print('商品名称:'+ name)</span></span><br><span class="line">        <span class="comment"># 获取商品的价格</span></span><br><span class="line">        price = div.find(<span class="string">'div'</span>, <span class="string">'g_price-highlight'</span>).find(<span class="string">'strong'</span>).text</span><br><span class="line">        <span class="comment"># print('价格:'+price)</span></span><br><span class="line">        <span class="comment"># 获取商品的销量</span></span><br><span class="line">        sales = div.find(<span class="string">'div'</span>, <span class="string">'deal-cnt'</span>).text.split(<span class="string">'人'</span>)[<span class="number">0</span>]</span><br><span class="line">        <span class="comment"># print('销量:'+ sales)</span></span><br><span class="line">        <span class="comment"># 获取商品地区信息</span></span><br><span class="line">        location = div.find(<span class="string">'div'</span>, <span class="string">'location'</span>).text</span><br><span class="line">        <span class="comment"># print('地区:'+ location)</span></span><br><span class="line">        <span class="comment"># 获取商家信息</span></span><br><span class="line">        <span class="keyword">try</span>:</span><br><span class="line">            business_info = div.find(<span class="string">'a'</span>, <span class="string">'shopname'</span>).find_all(<span class="string">'span'</span>)[<span class="number">-1</span>].text</span><br><span class="line">        <span class="keyword">except</span>:</span><br><span class="line">            business_info = div.find(<span class="string">'a'</span>, <span class="string">'shopname'</span>).text</span><br><span class="line"></span><br><span class="line">        <span class="comment"># print('商家信息:' + business_info)</span></span><br><span class="line">        result = &#123;</span><br><span class="line">            <span class="string">'name'</span>: name,</span><br><span class="line">            <span class="string">'image_link'</span>: image_link,</span><br><span class="line">            <span class="string">'price'</span>: price,</span><br><span class="line">            <span class="string">'sales'</span>: sales,</span><br><span class="line">            <span class="string">'location'</span>: location,</span><br><span class="line">            <span class="string">'business_info'</span>: business_info</span><br><span class="line">        &#125;</span><br><span class="line">        <span class="comment"># 插入数据</span></span><br><span class="line">        save_data_mongodb(result)</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">get_taobao_html</span><span class="params">(url, keyword)</span>:</span></span><br><span class="line">    <span class="string">"""</span></span><br><span class="line"><span class="string">    爬取原始页面</span></span><br><span class="line"><span class="string">    :param url: 爬取页面的url</span></span><br><span class="line"><span class="string">    :param keyword:  需要爬取的关键字</span></span><br><span class="line"><span class="string">    :return: 爬取完的所有数据</span></span><br><span class="line"><span class="string">    """</span></span><br><span class="line">    browser = webdriver.Chrome()</span><br><span class="line">    browser.get(url)</span><br><span class="line">    time.sleep(<span class="number">3</span>)</span><br><span class="line">    <span class="comment"># 模拟在输入框内输入数据</span></span><br><span class="line">    browser.find_element_by_id(<span class="string">'q'</span>).send_keys(keyword)</span><br><span class="line">    time.sleep(<span class="number">4</span>)</span><br><span class="line">    <span class="comment"># 模拟点击搜索</span></span><br><span class="line">    browser.find_element_by_class_name(<span class="string">'btn-search'</span>).click()</span><br><span class="line">    time.sleep(<span class="number">2</span>)</span><br><span class="line"></span><br><span class="line">    <span class="comment"># 获取到有多少页的搜索结果</span></span><br><span class="line">    total_page = browser.find_element_by_xpath(<span class="string">'//*[@id="mainsrp-pager"]/div/div/div/div[1]'</span>).text.split()[<span class="number">1</span>]</span><br><span class="line">    <span class="comment"># 获取页面资源</span></span><br><span class="line">    print(<span class="string">'正在获取第一页数据'</span>)</span><br><span class="line">    html_source_one = browser.page_source</span><br><span class="line">    time.sleep(<span class="number">8</span>)</span><br><span class="line">    <span class="comment"># print(html_source_one)</span></span><br><span class="line">    print(<span class="string">'正在解析第一页数据'</span>)</span><br><span class="line">    parse_taobao_goods_info_html(html_source_one)</span><br><span class="line">    time.sleep(<span class="number">8</span>)</span><br><span class="line">    print(<span class="string">'第一页数据插入完成'</span>)</span><br><span class="line"></span><br><span class="line">    <span class="comment"># for page in range(int(total_page)):</span></span><br><span class="line">    <span class="keyword">for</span> page <span class="keyword">in</span> range(<span class="number">4</span>):</span><br><span class="line">        <span class="comment"># 判断右下角页面输入框的值,来确定当前页</span></span><br><span class="line">        current_page = int(</span><br><span class="line">            browser.find_element_by_xpath(<span class="string">'//*[@id="mainsrp-pager"]/div/div/div/div[2]/input'</span>).get_attribute(<span class="string">'value'</span>))</span><br><span class="line">        <span class="keyword">if</span> current_page != page:</span><br><span class="line">            bottom = <span class="string">"window.scrollTo(0, document.body.scrollHeight)"</span></span><br><span class="line">            browser.execute_script(bottom)</span><br><span class="line">            time.sleep(<span class="number">1</span>)</span><br><span class="line">            <span class="comment"># 修改要爬取的页面值</span></span><br><span class="line">            browser.find_element_by_xpath(<span class="string">'//*[@id="mainsrp-pager"]/div/div/div/div[2]/input'</span>).clear()</span><br><span class="line">            browser.find_element_by_xpath(<span class="string">'//*[@id="mainsrp-pager"]/div/div/div/div[2]/input'</span>).send_keys(current_page)</span><br><span class="line">            <span class="comment"># 提交要爬取的页面</span></span><br><span class="line">            browser.find_element_by_xpath(<span class="string">'//*[@id="mainsrp-pager"]/div/div/div/div[2]/span[3]'</span>).click()</span><br><span class="line">            time.sleep(<span class="number">3</span>)</span><br><span class="line">            print(<span class="string">'++++++'</span> * <span class="number">10</span>)</span><br><span class="line">            print(<span class="string">'正在获取第%d页数...'</span> % current_page)</span><br><span class="line">            <span class="comment"># 获取当前页面的源码</span></span><br><span class="line">            html_source = browser.page_source</span><br><span class="line">            time.sleep(<span class="number">8</span>)</span><br><span class="line">            <span class="comment"># 解析获取的页面</span></span><br><span class="line">            print(<span class="string">'正在解析第%d页数...'</span> % current_page)</span><br><span class="line">            parse_taobao_goods_info_html(html_source)</span><br><span class="line">            time.sleep(<span class="number">8</span>)</span><br><span class="line">            print(<span class="string">'第%d页数据插入完成......'</span> % current_page)</span><br><span class="line">    browser.close()</span><br><span class="line"></span><br><span class="line"></span><br><span class="line"><span class="keyword">if</span> __name__ == <span class="string">'__main__'</span>:</span><br><span class="line">    keyword = input(<span class="string">'请输入要搜索的关键字:'</span>)</span><br><span class="line">    url = <span class="string">'https://www.taobao.com/'</span></span><br><span class="line">    get_taobao_html(url, keyword)</span><br></pre></td></tr></table></figure>
      
    </div>
    
    
    

    

    
      <div>
        <div style="padding: 10px 0; margin: 20px auto; width: 90%; text-align: center;">
  <div></div>
  <button id="rewardButton" disable="enable" onclick="var qr = document.getElementById('QR'); if (qr.style.display === 'none') {qr.style.display='block';} else {qr.style.display='none'}">
    <span>打赏</span>
  </button>
  <div id="QR" style="display: none;">

    
      <div id="wechat" style="display: inline-block">
        <img id="wechat_qr" src="/WechatIMG26.jpeg" alt="Mr. Lee 微信支付"/>
        <p>微信支付</p>
      </div>
    

    

    

  </div>
</div>

      </div>
    

    

    <div>
      
        <div>
    
        <div style="text-align:center;color: #ccc;font-size:14px;">
            -------------本文结束
            <i class="fa fa-paw"></i>
            感谢您的阅读-------------
        </div>
    
</div>
      
    </div>

    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/tags/spider-taobao/" rel="tag"><i class="fa fa-tag"></i> spider_taobao</a>
          
        </div>
      

      
      
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/2018/06/30/mongodb常用命令/" rel="next" title="mongodb常用命令">
                <i class="fa fa-chevron-left"></i> mongodb常用命令
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/2018/07/02/Scrapy框架的使用/" rel="prev" title="Scrapy框架的使用">
                Scrapy框架的使用 <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </div>
  
  
  
  </article>



    <div class="post-spread">
      
    </div>
  </div>


          </div>
          


          

  



        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      

      <section class="site-overview-wrap sidebar-panel sidebar-panel-active">
        <div class="site-overview">
          <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
            
              <img class="site-author-image" itemprop="image"
                src="/avatar.png"
                alt="Mr. Lee" />
            
              <p class="site-author-name" itemprop="name">Mr. Lee</p>
              <p class="site-description motion-element" itemprop="description"></p>
          </div>

          <nav class="site-state motion-element">

            
              <div class="site-state-item site-state-posts">
              
                <a href="/archives/">
              
                  <span class="site-state-item-count">33</span>
                  <span class="site-state-item-name">日志</span>
                </a>
              </div>
            

            

            
              
              
              <div class="site-state-item site-state-tags">
                <a href="/tags/index.html">
                  <span class="site-state-item-count">30</span>
                  <span class="site-state-item-name">标签</span>
                </a>
              </div>
            

          </nav>

          

          
            <div class="links-of-author motion-element">
                
                  <span class="links-of-author-item">
                    <a href="https://github.com/Leezhonglin" target="_blank" title="GitHub">
                      
                        <i class="fa fa-fw fa-github"></i>GitHub</a>
                  </span>
                
                  <span class="links-of-author-item">
                    <a href="mailto:380604322@qq.com" target="_blank" title="E-Mail">
                      
                        <i class="fa fa-fw fa-envelope"></i>E-Mail</a>
                  </span>
                
            </div>
          

          
          

          
          
            <div class="links-of-blogroll motion-element links-of-blogroll-block">
              <div class="links-of-blogroll-title">
                <i class="fa  fa-fw fa-link"></i>
                友情链接
              </div>

              <ul class="links-of-blogroll-list">

                
                    <span class="links-of-author-item" style="text-align:center">
                      <a href="https://my.csdn.net/jackfrued/" title="大 神" target="_blank">
                        大 神
                      </a>
                    </span>
                
                    <span class="links-of-author-item" style="text-align:center">
                      <a href="https://blog.csdn.net/qq_33196814" title="CSDN" target="_blank">
                        CSDN
                      </a>
                    </span>
                
                    <span class="links-of-author-item" style="text-align:center">
                      <a href="http://www.python.org" title="python" target="_blank">
                        python
                      </a>
                    </span>
                
                    <span class="links-of-author-item" style="text-align:center">
                      <a href="http://redisdoc.com/" title="redis" target="_blank">
                        redis
                      </a>
                    </span>
                

              </ul>


            </div>
          

          

        </div>
      </section>

      

      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <script async src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script>
<div class="copyright">&copy; <span itemprop="copyrightYear">2019</span>
  <span class="with-love">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">Mr. Lee</span>

  
</div>


  <span class="post-meta-divider">|</span>



  <div class="powered-by">由 <a class="theme-link" target="_blank" href="https://github.com/Leezhonglin">Leezhonglin</a> 提供技术支持</div>



  <span class="post-meta-divider">|</span>


<div class="powered-by">
<i class="fa fa-user-md"></i><span id="busuanzi_container_site_uv">
  本站访客数:<span id="busuanzi_value_site_uv"></span>
</span>


  <span class="post-meta-divider">|</span>


<span id="busuanzi_container_site_pv">
    本站总访问量:<span id="busuanzi_value_site_pv"></span>次
</span>


  <span class="post-meta-divider">|</span>


</div>


<div class="theme-info">
  <div class="powered-by"></div>
  <span class="post-count">博客全站共:64.5k字</span>
</div>


  <span class="post-meta-divider">|</span>




        







        
      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
      </div>
    

    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  












  
  
    <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>
  

  
  
    <script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>
  

  
  
    <script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>
  


  


  <script type="text/javascript" src="/js/src/utils.js?v=5.1.4"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=5.1.4"></script>



  
  


  <script type="text/javascript" src="/js/src/affix.js?v=5.1.4"></script>

  <script type="text/javascript" src="/js/src/schemes/pisces.js?v=5.1.4"></script>



  
  <script type="text/javascript" src="/js/src/scrollspy.js?v=5.1.4"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=5.1.4"></script>



  


  <script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.4"></script>



  


  




	





  





  












  

  <script type="text/javascript">
    // Popup Window;
    var isfetched = false;
    var isXml = true;
    // Search DB path;
    var search_path = "search.xml";
    if (search_path.length === 0) {
      search_path = "search.xml";
    } else if (/json$/i.test(search_path)) {
      isXml = false;
    }
    var path = "/" + search_path;
    // monitor main search box;

    var onPopupClose = function (e) {
      $('.popup').hide();
      $('#local-search-input').val('');
      $('.search-result-list').remove();
      $('#no-result').remove();
      $(".local-search-pop-overlay").remove();
      $('body').css('overflow', '');
    }

    function proceedsearch() {
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay"></div>')
        .css('overflow', 'hidden');
      $('.search-popup-overlay').click(onPopupClose);
      $('.popup').toggle();
      var $localSearchInput = $('#local-search-input');
      $localSearchInput.attr("autocapitalize", "none");
      $localSearchInput.attr("autocorrect", "off");
      $localSearchInput.focus();
    }

    // search function;
    var searchFunc = function(path, search_id, content_id) {
      'use strict';

      // start loading animation
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay">' +
          '<div id="search-loading-icon">' +
          '<i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>' +
          '</div>' +
          '</div>')
        .css('overflow', 'hidden');
      $("#search-loading-icon").css('margin', '20% auto 0 auto').css('text-align', 'center');

      $.ajax({
        url: path,
        dataType: isXml ? "xml" : "json",
        async: true,
        success: function(res) {
          // get the contents from search data
          isfetched = true;
          $('.popup').detach().appendTo('.header-inner');
          var datas = isXml ? $("entry", res).map(function() {
            return {
              title: $("title", this).text(),
              content: $("content",this).text(),
              url: $("url" , this).text()
            };
          }).get() : res;
          var input = document.getElementById(search_id);
          var resultContent = document.getElementById(content_id);
          var inputEventFunction = function() {
            var searchText = input.value.trim().toLowerCase();
            var keywords = searchText.split(/[\s\-]+/);
            if (keywords.length > 1) {
              keywords.push(searchText);
            }
            var resultItems = [];
            if (searchText.length > 0) {
              // perform local searching
              datas.forEach(function(data) {
                var isMatch = false;
                var hitCount = 0;
                var searchTextCount = 0;
                var title = data.title.trim();
                var titleInLowerCase = title.toLowerCase();
                var content = data.content.trim().replace(/<[^>]+>/g,"");
                var contentInLowerCase = content.toLowerCase();
                var articleUrl = decodeURIComponent(data.url);
                var indexOfTitle = [];
                var indexOfContent = [];
                // only match articles with not empty titles
                if(title != '') {
                  keywords.forEach(function(keyword) {
                    function getIndexByWord(word, text, caseSensitive) {
                      var wordLen = word.length;
                      if (wordLen === 0) {
                        return [];
                      }
                      var startPosition = 0, position = [], index = [];
                      if (!caseSensitive) {
                        text = text.toLowerCase();
                        word = word.toLowerCase();
                      }
                      while ((position = text.indexOf(word, startPosition)) > -1) {
                        index.push({position: position, word: word});
                        startPosition = position + wordLen;
                      }
                      return index;
                    }

                    indexOfTitle = indexOfTitle.concat(getIndexByWord(keyword, titleInLowerCase, false));
                    indexOfContent = indexOfContent.concat(getIndexByWord(keyword, contentInLowerCase, false));
                  });
                  if (indexOfTitle.length > 0 || indexOfContent.length > 0) {
                    isMatch = true;
                    hitCount = indexOfTitle.length + indexOfContent.length;
                  }
                }

                // show search results

                if (isMatch) {
                  // sort index by position of keyword

                  [indexOfTitle, indexOfContent].forEach(function (index) {
                    index.sort(function (itemLeft, itemRight) {
                      if (itemRight.position !== itemLeft.position) {
                        return itemRight.position - itemLeft.position;
                      } else {
                        return itemLeft.word.length - itemRight.word.length;
                      }
                    });
                  });

                  // merge hits into slices

                  function mergeIntoSlice(text, start, end, index) {
                    var item = index[index.length - 1];
                    var position = item.position;
                    var word = item.word;
                    var hits = [];
                    var searchTextCountInSlice = 0;
                    while (position + word.length <= end && index.length != 0) {
                      if (word === searchText) {
                        searchTextCountInSlice++;
                      }
                      hits.push({position: position, length: word.length});
                      var wordEnd = position + word.length;

                      // move to next position of hit

                      index.pop();
                      while (index.length != 0) {
                        item = index[index.length - 1];
                        position = item.position;
                        word = item.word;
                        if (wordEnd > position) {
                          index.pop();
                        } else {
                          break;
                        }
                      }
                    }
                    searchTextCount += searchTextCountInSlice;
                    return {
                      hits: hits,
                      start: start,
                      end: end,
                      searchTextCount: searchTextCountInSlice
                    };
                  }

                  var slicesOfTitle = [];
                  if (indexOfTitle.length != 0) {
                    slicesOfTitle.push(mergeIntoSlice(title, 0, title.length, indexOfTitle));
                  }

                  var slicesOfContent = [];
                  while (indexOfContent.length != 0) {
                    var item = indexOfContent[indexOfContent.length - 1];
                    var position = item.position;
                    var word = item.word;
                    // cut out 100 characters
                    var start = position - 20;
                    var end = position + 80;
                    if(start < 0){
                      start = 0;
                    }
                    if (end < position + word.length) {
                      end = position + word.length;
                    }
                    if(end > content.length){
                      end = content.length;
                    }
                    slicesOfContent.push(mergeIntoSlice(content, start, end, indexOfContent));
                  }

                  // sort slices in content by search text's count and hits' count

                  slicesOfContent.sort(function (sliceLeft, sliceRight) {
                    if (sliceLeft.searchTextCount !== sliceRight.searchTextCount) {
                      return sliceRight.searchTextCount - sliceLeft.searchTextCount;
                    } else if (sliceLeft.hits.length !== sliceRight.hits.length) {
                      return sliceRight.hits.length - sliceLeft.hits.length;
                    } else {
                      return sliceLeft.start - sliceRight.start;
                    }
                  });

                  // select top N slices in content

                  var upperBound = parseInt('1');
                  if (upperBound >= 0) {
                    slicesOfContent = slicesOfContent.slice(0, upperBound);
                  }

                  // highlight title and content

                  function highlightKeyword(text, slice) {
                    var result = '';
                    var prevEnd = slice.start;
                    slice.hits.forEach(function (hit) {
                      result += text.substring(prevEnd, hit.position);
                      var end = hit.position + hit.length;
                      result += '<b class="search-keyword">' + text.substring(hit.position, end) + '</b>';
                      prevEnd = end;
                    });
                    result += text.substring(prevEnd, slice.end);
                    return result;
                  }

                  var resultItem = '';

                  if (slicesOfTitle.length != 0) {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + highlightKeyword(title, slicesOfTitle[0]) + "</a>";
                  } else {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + title + "</a>";
                  }

                  slicesOfContent.forEach(function (slice) {
                    resultItem += "<a href='" + articleUrl + "'>" +
                      "<p class=\"search-result\">" + highlightKeyword(content, slice) +
                      "...</p>" + "</a>";
                  });

                  resultItem += "</li>";
                  resultItems.push({
                    item: resultItem,
                    searchTextCount: searchTextCount,
                    hitCount: hitCount,
                    id: resultItems.length
                  });
                }
              })
            };
            if (keywords.length === 1 && keywords[0] === "") {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-search fa-5x" /></div>'
            } else if (resultItems.length === 0) {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-frown-o fa-5x" /></div>'
            } else {
              resultItems.sort(function (resultLeft, resultRight) {
                if (resultLeft.searchTextCount !== resultRight.searchTextCount) {
                  return resultRight.searchTextCount - resultLeft.searchTextCount;
                } else if (resultLeft.hitCount !== resultRight.hitCount) {
                  return resultRight.hitCount - resultLeft.hitCount;
                } else {
                  return resultRight.id - resultLeft.id;
                }
              });
              var searchResultList = '<ul class=\"search-result-list\">';
              resultItems.forEach(function (result) {
                searchResultList += result.item;
              })
              searchResultList += "</ul>";
              resultContent.innerHTML = searchResultList;
            }
          }

          if ('auto' === 'auto') {
            input.addEventListener('input', inputEventFunction);
          } else {
            $('.search-icon').click(inputEventFunction);
            input.addEventListener('keypress', function (event) {
              if (event.keyCode === 13) {
                inputEventFunction();
              }
            });
          }

          // remove loading animation
          $(".local-search-pop-overlay").remove();
          $('body').css('overflow', '');

          proceedsearch();
        }
      });
    }

    // handle and trigger popup window;
    $('.popup-trigger').click(function(e) {
      e.stopPropagation();
      if (isfetched === false) {
        searchFunc(path, 'local-search-input', 'local-search-result');
      } else {
        proceedsearch();
      };
    });

    $('.popup-btn-close').click(onPopupClose);
    $('.popup').click(function(e){
      e.stopPropagation();
    });
    $(document).on('keyup', function (event) {
      var shouldDismissSearchPopup = event.which === 27 &&
        $('.search-popup').is(':visible');
      if (shouldDismissSearchPopup) {
        onPopupClose();
      }
    });
  </script>





  

  

  

  
  

  

  

  

</body>
</html>
